In this notebook, we read the 'lif' files that resulted from manual annotation in labelIMG. They provide a bounding box that marks the 'OS' in the image. We want to use these as input for a classifier that segments the OS. We could generate a train data set by making patches that are centered around the center of the bbox, and patches that are completely outside of the bbox. The resulting classifier can classify a pixel by looking at the patch around it. This gives 'heatmap' of os-iness of the pixels. Hopefully we can take the maximum of the heatmap as the center of OS.



In [20]:

    
import matplotlib.pyplot as plt
%matplotlib inline
from bs4 import BeautifulSoup
import os
import cv2



In [19]:

    
lif_path = '/media/sf_VBox_Shared/kaggle/cervical-cancer/labels/'
image_path = '/media/sf_VBox_Shared/kaggle/cervical-cancer/processed/'
types = ['Type_1', 'Type_2', 'Type_3']



In [ ]:

    
def get_bbox(path):
    with open(path, 'r') as f:
        soup = BeautifulSoup(f, 'lxml')
        box = soup.find('bndbox')
        keys = ['xmin', 'xmax', 'ymin', 'ymax']
        return {key:int(box.find(key).contents[0]) for key in keys}



In [60]:

    
bboxes = []
for typ in types:
    for fn in os.listdir(os.path.join(lif_path, typ)):
        bbox = get_bbox(os.path.join(lif_path, typ, fn))
        bbox['width'] = bbox['xmax'] - bbox['xmin']
        bbox['height'] = bbox['ymax'] - bbox['ymin']
        bbox['area'] = bbox['width'] * bbox['height']

        fn_image = fn.replace('.lif', '.jpg')
        img = cv2.imread(os.path.join(image_path, typ, fn_image))
        w, h, c = img.shape
        bbox['img_dim'] = (w, h, c)
        bbox['rel_xmin'] = bbox['xmin'] / float(w)
        bbox['rel_xmax'] = bbox['xmax'] / float(w)
        bbox['rel_ymin'] = bbox['ymin'] / float(h)
        bbox['rel_ymax'] = bbox['ymax'] / float(h)
        bbox['rel_width'] = bbox['rel_xmax'] - bbox['rel_xmin']
        bbox['rel_height'] = bbox['rel_ymax'] - bbox['rel_ymin']
        bboxes.append(bbox)



In [61]:

    
import pandas as pd
bboxes_df = pd.DataFrame(bboxes)



In [62]:

    
bboxes_df.head()









    Out[62]:






  
    
      
      area
      height
      img_dim
      rel_height
      rel_width
      rel_xmax
      rel_xmin
      rel_ymax
      rel_ymin
      width
      xmax
      xmin
      ymax
      ymin
    
  
  
    
      0
      1950
      39
      (256, 251, 3)
      0.155378
      0.195312
      0.710938
      0.515625
      0.541833
      0.386454
      50
      182
      132
      136
      97
    
    
      1
      3366
      33
      (202, 256, 3)
      0.128906
      0.504950
      0.990099
      0.485149
      0.574219
      0.445312
      102
      200
      98
      147
      114
    
    
      2
      903
      21
      (256, 205, 3)
      0.102439
      0.167969
      0.339844
      0.171875
      0.741463
      0.639024
      43
      87
      44
      152
      131
    
    
      3
      1340
      20
      (256, 165, 3)
      0.121212
      0.261719
      0.437500
      0.175781
      0.793939
      0.672727
      67
      112
      45
      131
      111
    
    
      4
      2581
      29
      (243, 256, 3)
      0.113281
      0.366255
      0.650206
      0.283951
      0.621094
      0.507812
      89
      158
      69
      159
      130



In [63]:

    
print('min:', bboxes_df[['width', 'height']].min())
print('max:', bboxes_df[['width', 'height']].max())
bboxes_df[['width', 'height']].boxplot();
plt.show()
bboxes_df[['area']].boxplot();
plt.show()









    



('min:', width     10
height    12
dtype: int64)
('max:', width     157
height     97
dtype: int64)






    



/home/dafne/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
  app.launch_new_instance()






    












    



/home/dafne/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:5: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.



In [64]:

    
print('min:', bboxes_df[['rel_width', 'rel_height']].min())
print('max:', bboxes_df[['rel_width', 'rel_height']].max())
bboxes_df[['rel_width', 'rel_height']].boxplot();
plt.show()









    



('min:', rel_width     0.039062
rel_height    0.046875
dtype: float64)
('max:', rel_width     0.613281
rel_height    0.427313
dtype: float64)






    



/home/dafne/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
  app.launch_new_instance()



In [65]:

    
plt.scatter(bboxes_df['width'], bboxes_df['height'])









    Out[65]:





<matplotlib.collections.PathCollection at 0x7fd55a22ca10>



In [66]:

    
plt.scatter(bboxes_df['rel_width'], bboxes_df['rel_height'])









    Out[66]:





<matplotlib.collections.PathCollection at 0x7fd55a10d550>



In [69]:

    
plt.scatter(bboxes_df['rel_width'], [d[0] for d in bboxes_df['img_dim']])









    Out[69]:





<matplotlib.collections.PathCollection at 0x7fd55a45bf90>



In [10]:

    
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)



In [46]:

    
img.shape









    Out[46]:





(202, 256, 3)



In [11]:

    
plt.imshow(img)









    Out[11]:





<matplotlib.image.AxesImage at 0x7fd5639a2150>



In [18]:

    
rect_img = cv2.rectangle(img, (bbox['xmin'], bbox['ymin']), (bbox['xmax'], bbox['ymax']), (0, 255, 0))
plt.imshow(rect_img)









    Out[18]:





<matplotlib.image.AxesImage at 0x7fd5620e9310>



In [ ]:

	area	height	img_dim	rel_height	rel_width	rel_xmax	rel_xmin	rel_ymax	rel_ymin	width	xmax	xmin	ymax	ymin
0	1950	39	(256, 251, 3)	0.155378	0.195312	0.710938	0.515625	0.541833	0.386454	50	182	132	136	97
1	3366	33	(202, 256, 3)	0.128906	0.504950	0.990099	0.485149	0.574219	0.445312	102	200	98	147	114
2	903	21	(256, 205, 3)	0.102439	0.167969	0.339844	0.171875	0.741463	0.639024	43	87	44	152	131
3	1340	20	(256, 165, 3)	0.121212	0.261719	0.437500	0.175781	0.793939	0.672727	67	112	45	131	111
4	2581	29	(243, 256, 3)	0.113281	0.366255	0.650206	0.283951	0.621094	0.507812	89	158	69	159	130